package p2tokenstream;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.IOException;

public class TestToken {
    /**
     * 可以看到这些不同的分词器，分词的策略是不同的，
     * 比如StandardAnalyzer 标准分词器就没有分词 this,这个词，
     * 这个分词器对于 majian@jd.com 没有分词@ 符号，把jd.com 当成一个词处理
     * @throws IOException
     */
    @Test
    public void test1() throws IOException {
        Analyzer a1=new StandardAnalyzer(Version.LUCENE_35);
        Analyzer a2=new StopAnalyzer(Version.LUCENE_35);
        Analyzer a3=new SimpleAnalyzer(Version.LUCENE_35);
        Analyzer a4=new WhitespaceAnalyzer(Version.LUCENE_35);
        String str="this is my house,I am com from china，my mail is majian@jd.com,123456";
        TokenAttributeUtils.displayToken(str,a1);
        TokenAttributeUtils.displayToken(str,a2);
        TokenAttributeUtils.displayToken(str,a3);
        TokenAttributeUtils.displayToken(str,a4);
    }


    @Test
    public void test2() throws IOException {
        Analyzer a1=new StandardAnalyzer(Version.LUCENE_35);
        Analyzer a2=new StopAnalyzer(Version.LUCENE_35);
        Analyzer a3=new SimpleAnalyzer(Version.LUCENE_35);
        Analyzer a4=new WhitespaceAnalyzer(Version.LUCENE_35);
        String str="this is my house,I am com from yunang zhaotong，majian@jd.com,123456";
        TokenAttributeUtils.displayAllTokenInfo(str,a1);
        TokenAttributeUtils.displayAllTokenInfo(str,a2);
        TokenAttributeUtils.displayAllTokenInfo(str,a3);
        TokenAttributeUtils.displayAllTokenInfo(str,a4);
    }
}
